##%%%%%%%%%%%%%%%%%%%%%%%%%   ANAL.R  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
## In this script, we perform analysis of the merged data 
## generated by the script "MERGE.R"

library(Matrix)
library(MASS) ## For ginv() - Moore penrose inverse
library(ggplot2)
library(stringr) ## for string manipulation

##%%%%%%%% Set Working Directory %%%%%%%%%%%%
WorkDir="D:/RWORK/Repository Files"
setwd(WorkDir)

##%%%%%%%%%% Read in function files %%%%%%%%%

source("FUNC.R")

##%%%%%%%%%%%%%%%%%%%%%%%%% Load Data %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
load("rsqAA")
load("rsqMedAA")
load("rsqInpMedAA")

##%%%%%%%%% Collect cross model - sim stats %%%%%%%%%%%%%%%%%%%%
##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
## Find first instance of every input group
jjModStats=c(2:13)
# jjStdStats=c(6:13)
rN=rownames(rsqInpMedAA)
simNms=substring(rN,1,7)

## Add back .5
dd=substring(simNms,nchar(simNms),nchar(simNms))=="."
simNms[dd]=paste(simNms[dd],"5",sep="")

## Strip white space
ww=substring(simNms,nchar(simNms),nchar(simNms))==" "
simNms[ww]=substring(simNms[ww],1,nchar(simNms[ww])-1)
ww=substring(simNms,nchar(simNms),nchar(simNms))==" "
simNms[ww]=substring(simNms[ww],1,nchar(simNms[ww])-1)


nR=length(simNms)
ii1=c(1,(2:nR)[simNms[1:(nR-1)]!=simNms[2:nR]])

modStatsSimAA=rsqInpMedAA[ii1,jjModStats]

## Strip white space
simNmsS=simNms[ii1]

## Add proper rownames
rownames(modStatsSimAA)=simNmsS

## Get modStatSim that match original data

modNamesN1=c("A2t1N1","A2t2N1","A3t1N1","A3t2N1","A4t1N1","A4t2N1","A4St1N1",
			"A4St2N1","A7St1N1","A7St2N1","A7t1N1","A7t2N1") 

modNamesN.5=c("A2t1N.5","A2t2N.5","A3t1N.5","A3t2N.5","A4t1N.5","A4t2N.5","A4St1N.5",
			"A4St2N.5","A7St1N.5","A7St2N.5","A7t1N.5","A7t2N.5")

modStatsSimN1AA=modStatsSimAA[modNamesN1,]
modStatsSimN.5AA=modStatsSimAA[modNamesN.5,]

##%%%% Create modStatsSimN1s, modStatsSimN.5s for paper %%%%%%%%
foo=modStatsSimN1AA[,"cv.r.squaredI"]/modStatsSimN1AA[,"r.squaredI"]
cols=c("numb. inputs w. pval<=0.1","n-inputs","r.squaredI","cv.r.squaredI")
modStatsSimN1s=cbind(modStatsSimN1AA[,cols],foo)
colnames(modStatsSimN1s)[ncol(modStatsSimN1s)]="cv.rsq/in.rsq"


foo=modStatsSimN.5AA[,"cv.r.squaredI"]/modStatsSimN.5AA[,"r.squaredI"]
cols=c("numb. inputs w. pval<=0.1","n-inputs","r.squaredI","cv.r.squaredI")
modStatsSimN.5s=cbind(modStatsSimN.5AA[,cols],foo)
colnames(modStatsSimN.5s)[ncol(modStatsSimN.5s)]="cv.rsq/in.rsq"


##%%%%%%%%%%%%% Save These

# write.csv(modStatsSimN1s,file="modStatsSimN1.csv")
# write.csv(modStatsSimN.5s,file="modStatsSimN.5.csv")



##%%%%%%%%%%%%%%%%%% Define stats used in analysis %%%%%%%%%%%%%%%%%%%
##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

modDF=rsqAA[,"df_modI"]
fstat=rsqAA[,"fstat"]
outL.fstat=rsqAA[,"outL.fstat"]
cv.fstat=rsqAA[,"cv.fstat"]

nPval=rsqAA[,"n-pval<=0.1"]
rsqOutVsIn=rsqAA[,"outL.r.squaredI"]/rsqAA[,"r.squaredI"]

probF=rsqAA[,"fstat p-value"]
outL.probF=rsqAA[,"outL.fstat p-value"]

pVal=probF

prOutLbel95=rsqAA[,"pr outL.fstat<95% lowb"]
prOutLbel99=rsqAA[,"pr outL.fstat<99% lowb"]
prOutLbel0=rsqAA[,"pr outL.fstat<=0"]


##%%%%%%%%%%% Comparison of cv. with other stats-aggregates %%%%%%%%%
tstat=abs(rsqAA[,"t value"])
in.tstatF=rsqAA[,"tstatF"]
outL.tstatF=rsqAA[,"outL.tstatF"]
cv.tstatF=rsqAA[,"cv.tstatF"]

##%%%%% Rsq ratio's-aggregates %%%%%%%%%
rsqIn=rsqAA[,"r.squaredI"]
rsqOutL=rsqAA[,"outL.r.squaredI"]
rsqCv=rsqAA[,"cv.r.squaredI"]

rsqOutLvsIn=rsqOutL/rsqIn
rsqCvVsIn=rsqCv/rsqIn


##%%%%%%%%%%%%%% Overfitting measure bins %%%%%%%%%%%%%%%
# > quantile(rsqOutLvsIn,(0:5)/5)
#         0%        20%        40%        60%        80%       100% 
# -1.7859282  0.1943516  0.5290193  0.7031844  0.8354652  1.5516680 

rsqQ1=rsqOutLvsIn<0.2
rsqQ2=rsqOutLvsIn>=0.2&rsqOutLvsIn<0.5
rsqQ3=rsqOutLvsIn>=0.5&rsqOutLvsIn<0.7
rsqQ4=rsqOutLvsIn>=0.7&rsqOutLvsIn<0.85
rsqQ5=rsqOutLvsIn>=0.85

rsqQL=rsqOutLvsIn<0.5
rsqQU=rsqOutLvsIn>=0.5

cv.rsqQ1=rsqCvVsIn<0.2
cv.rsqQ2=rsqCvVsIn>=0.2&rsqCvVsIn<0.5
cv.rsqQ3=rsqCvVsIn>=0.5&rsqCvVsIn<0.7
cv.rsqQ4=rsqCvVsIn>=0.7&rsqCvVsIn<0.85
cv.rsqQ5=rsqCvVsIn>=0.85

cv.rsqQL=rsqCvVsIn<0.5
cv.rsqQU=rsqCvVsIn>=0.5


##%%%%%%%%%%%% n-obs binning
nobsQ500=modDF<1000
nobsQ1125=modDF>=1000&modDF<2000
nobsQ2500=modDF>=2000&modDF<4000
nobsQ5000=modDF>=4000

## n-inputs
ninsQ10=rsqAA[,"n-inputs"]<20
ninsQ30=rsqAA[,"n-inputs"]>=20&rsqAA[,"n-inputs"]<40
ninsQ60=rsqAA[,"n-inputs"]>=40



##%%%%%%% binsides based on p-values and fstat %%%%%%%%
qqPval=c(0,0.0001,0.001,0.01,0.025,0.05,0.075,0.11,1)

# > round(qf(1-qqPval[length(qqPval):1],1,5000),2)
# [1]  2.56  3.17  3.84  5.03  6.64 10.84 15.16   Inf
qqFstat=c(0,2,3,3.5,4,5,7,10,15,200)

qqTstat=c(0,1.5,1.8,1.95,2.2,2.5,3,3.5,4,20)



##%%%%%%%%%%%%%%%% rsqOI by no.-inputs, data set size %%%%%%%%%%%%%%%%%%
##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
## Bin rsqOI on number of inputs and data set size
rsqOI=rsqOutLvsIn

nobsQ500=modDF<1000
nobsQ1125=modDF>=1000&modDF<2000
nobsQ2500=modDF>=2000&modDF<4000
nobsQ5000=modDF>=4000

## n-inputs
ninsQ10=rsqAA[,"n-inputs"]<20
ninsQ30=rsqAA[,"n-inputs"]>=20&rsqAA[,"n-inputs"]<40
ninsQ60=rsqAA[,"n-inputs"]>=40

ninsM=cbind(ninsQ10,ninsQ30,ninsQ60)
nobsM=cbind(nobsQ5000,nobsQ2500,nobsQ1125,nobsQ500)

foo10=c(mean(rsqOI[ninsM[,1]&nobsM[,1]]),mean(rsqOI[ninsM[,1]&nobsM[,2]]),
			mean(rsqOI[ninsM[,1]&nobsM[,3]]),mean(rsqOI[ninsM[,1]&nobsM[,4]]))
foo30=c(mean(rsqOI[ninsM[,2]&nobsM[,1]]),mean(rsqOI[ninsM[,2]&nobsM[,2]]),
			mean(rsqOI[ninsM[,2]&nobsM[,3]]),mean(rsqOI[ninsM[,2]&nobsM[,4]]))
foo60=c(mean(rsqOI[ninsM[,3]&nobsM[,1]]),mean(rsqOI[ninsM[,3]&nobsM[,2]]),
			mean(rsqOI[ninsM[,3]&nobsM[,3]]),mean(rsqOI[ninsM[,3]&nobsM[,4]]))

rsqInsObs=rbind(foo10,foo30,foo60)

colnames(rsqInsObs)=c("N=5000","N=2500","N=1125","N=500")
rownames(rsqInsObs)=c("n-ind.vars~10","n-ind.vars~30","n-ind.vars~60")

# > round(rsqInsObs,3)
#               N=5000 N=2500 N=1125  N=500
# n-ind.vars~10  0.975  0.953  0.905  0.798
# n-ind.vars~30  0.845  0.716  0.458  0.072
# n-ind.vars~60  0.735  0.531  0.180 -0.290


overfit.rsqInsObs=rsqInsObs
# write.csv(overfit.rsqInsObs,file="overfit.rsqInsObs.csv")


##%%%%%%%%%%%% Show fstat and TtoF aren't sensitive to df2 %%%%%%%%
# > min(rsqAA[,"df_modD"])
# [1] 431
# > max(rsqAA[,"df_modD"])
# [1] 4990
qqFstatS=c(0,2.5,4,5,7,11,200)
ll=2:(length(qqFstatS)-1)
q431=pf(qqFstatS[ll],1,431)
q4990=pf(qqFstatS[ll],1,4900)
t431=FtoT(qqFstatS[ll],1,431)
t4990=FtoT(qqFstatS[ll],1,4900)

dfNotImp=rbind(q431,q4990,t431,t4990)
colnames(dfNotImp)=paste("fstat=",qqFstatS[ll],sep="")
rownames(dfNotImp)=c("fstat.p-value df2=431","fstat.p-value df2=490","FtoT df=431","FtoT df2=4900")

# > dfNotImp
#                       fstat=2.5   fstat=4   fstat=5   fstat=7  fstat=11
# fstat.p-value df2=431 0.8854202 0.9538727 0.9741416 0.9915509 0.9990117
# fstat.p-value df2=490 0.8860892 0.9544446 0.9746078 0.9918230 0.9990822
# FtoT df=431           1.5811388 2.0000000 2.2360680 2.6457513 3.3166248
# FtoT df2=4900         1.5811388 2.0000000 2.2360680 2.6457513 3.3166248

overfit.dfNotImp=dfNotImp

# write.csv(overfit.dfNotImp,file="D:\\RWORK\\EffectPaper\\overfit.dfNotImp.csv")


##%%%%%%%%%%%%%%%% Selection Effect Tables %%%%%%%%%%%%%%%%
##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
##%%%% Table for selection effect for less overfit models

qqTstatS=c(0,1.5,1.8,1.95,2.5,3.0,4.0,20)

fooIFQ5T=MultiBinM(rsqAA[rsqQ5,"tstatF"],fstat[rsqQ5],binSides=qqTstatS)
fooOFQ5T=MultiBinM(rsqAA[rsqQ5,"tstatF"],outL.fstat[rsqQ5],binSides=qqTstatS)
fooNpvalQ5T=MultiBin(rsqAA[rsqQ5,"tstatF"],nPval[rsqQ5],binSides=qqTstatS)
foo=cbind(fooNpvalQ5T[[1]],fooIFQ5T[[1]],fooOFQ5T[[1]])
fooAT=round(cbind(foo,FtoT(foo[,2:3],1,2500)),3)
colnames(fooAT)=c("nPval","fstat","out.fstat","tstat","out.tstat")
fooAT[,1]=round(fooAT[,1])

selectionEffectQ5T=fooAT
# > fooAT
#             nPval  fstat out.fstat tstat out.tstat
# 0<x<=1.5       48  0.465     0.030 0.682     0.172
# 1.5<x<=1.8     83  2.704     1.613 1.644     1.270
# 1.8<x<=1.95    96  3.506     2.248 1.872     1.499
# 1.95<x<=2.5   114  4.853     3.597 2.203     1.896
# 2.5<x<=3      141  7.468     5.789 2.733     2.406
# 3<x<=4        170 11.832     9.586 3.440     3.096
# 4<x<=20       193 30.023    26.918 5.479     5.188


# write.csv(selectionEffectQ5T,file="D:\\RWORK\\EffectPaper\\selectionEffectQ5T.csv")
## Drops 1 in fstat, 0.4 in tstat


##%%%%%% Table for selection effect for more overfit models %%%%%%
fooIFQLT=MultiBinM(rsqAA[rsqQL,"tstatF"],fstat[rsqQL],binSides=qqTstatS)
fooOFQLT=MultiBinM(rsqAA[rsqQL,"tstatF"],outL.fstat[rsqQL],binSides=qqTstatS)
fooNpvalQLT=MultiBinM(rsqAA[rsqQL,"tstatF"],nPval[rsqQL],binSides=qqTstatS)
foo=cbind(fooNpvalQLT[[1]],fooIFQLT[[1]],fooOFQLT[[1]])
fooLT=round(cbind(foo,FtoT(foo[,2:3],1,2500)),3)
colnames(fooLT)=c("nPval","fstat","out.fstat","tstat","out.tstat")
fooLT[,1]=round(fooLT[,1])

selectionEffectQLT=fooLT
# > fooLT
#             nPval  fstat out.fstat tstat out.tstat
# 0<x<=1.8       25  0.482    -0.114 0.695     0.000
# 1.8<x<=1.95    35  3.501    -1.165 1.871     0.000
# 1.95<x<=2.5    43  4.743    -1.203 2.178     0.000
# 2.5<x<=3       67  7.316    -0.631 2.705     0.000
# 3<x<=4        121 11.165     1.622 3.341     1.274
# 4<x<=20       191 21.079    10.507 4.591     3.242

# write.csv(selectionEffectQLT,file="D:\\RWORK\\EffectPaper\\selectionEffectQLT.csv")

##%%%%%%%%%%%%%%%%% Misestimate Effct %%%%%%%%%%%%%%%%%%%%%%%%%
##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
## This is to look at misestimate errors, considering
## the median's across models as a good estimate of
## the "true value" of the statistic (what one would
## get running N-models, N->inf).

##%%%%%%%%% Look at tstatMed deltas conditioned on in.tstat %%%%%%%%
qqTstat=c(0,1.5,1.8,1.95,2.2,2.5,3,3.5,4,20)
qqTstatS=c(0,1.8,1.95,2.5,3,4,20)
tstatDmed=abs(rsqAA[,"tstatF"])-abs(rsqMedAA[,"tstatF"])
outL.tstatDmed=abs(rsqAA[,"outL.tstatF"])-abs(rsqMedAA[,"outL.tstatF"])

fooTD=MultiBin(abs(rsqAA[,"t value"]),tstatDmed,binSides=qqTstat)
fooTDO=MultiBin(abs(rsqAA[,"t value"]),outL.tstatDmed,binSides=qqTstat)
misestTstat=round(cbind(t(fooTD[[3]]),fooTDO[[1]],fooTD[[1]]),4)
colnames(misestTstat)=c("in.tstat","outL.tstatDmed","in.tstatDmed")

misestTstatStd=round(cbind(fooTD[[1]],fooTD[[2]],fooTDO[[1]],fooTDO[[2]]),4)
colnames(misestTstatStd)=c("in.tstatDmed","in.tstatDmed.std","outL.tstatDmed",
					"outL.tstatDmed.std")


# write.csv(misestTstatStd,file="D:\\RWORK\\EffectPaper\\misestTstatStd.csv")


# > misestTstatStd
#             in.tstatDmed in.tstatDmed.std outL.tstatDmed outL.tstatDmed.std
# 0<x<=1.5         -1.3889           0.0008         0.1703             0.0007
# 1.5<x<=1.8       -0.5108           0.0015         0.1691             0.0022
# 1.8<x<=1.95      -0.3193           0.0027         0.1330             0.0033
# 1.95<x<=2.2      -0.1937           0.0026         0.0929             0.0027
# 2.2<x<=2.5       -0.0107           0.0032         0.0249             0.0030
# 2.5<x<=3          0.2079           0.0037        -0.0683             0.0031
# 3<x<=3.5          0.4193           0.0058        -0.1723             0.0047
# 3.5<x<=4          0.5651           0.0086        -0.2349             0.0067
# 4<x<=20           0.6167           0.0069        -0.1787             0.0042
## This shows the same statistics with standard errors which are very small

## Here we see that the in.tstatDmed tends to be negative
## for lower tstat-values (higher likelihood of type II
## errors ~ downward misestimates).
## Next, we find positive values of in.tstatDmed for higher 
## tstat-values (more frequent cases of type I errors ~ 
## upward misestimates).
##
## As one can then expect given our previous analysis, one
## then sees in outL.tstatDmed sign flip and negative
## correlation between the in and out tstatDmed's. 
## As such, out of sample the misestimate errors found
## in sample will tend to revert slightly up for low t-values
## (slightly countering the selection effect, which is more
## likely for low tstat variables).


##%%%%%%%%%%%%%%%%%%%%% Overfitting Effect %%%%%%%%%%%%%%%%%%%%%%%%
##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


##%%%%%%%%%%%%%% Overfitting-Confidence intervals %%%%%%%%%%%%%%%%%
##             %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
## prOutLbel95, prOutLbel99, prOutLbel0
prOutLbel95=rsqAA[,"pr outL.fstat<95% lowb"]
prOutLbel99=rsqAA[,"pr outL.fstat<99% lowb"]
prOutLbel0=rsqAA[,"pr outL.fstat<=0"]

## Coverage probabilities 95% lower bounds

## Look at this binning on tstat
qqTstatSS=c(0.00,  1.80,  1.95,  2.50,  3.00,  4.00, 20.00)
Pr95Q1T=MultiBin(rsqAA[rsqQ1,"tstatF"],prOutLbel95[rsqQ1],binSides=qqTstatSS,rcor="rank")
Pr95Q2T=MultiBin(rsqAA[rsqQ2,"tstatF"],prOutLbel95[rsqQ2],binSides=qqTstatSS,rcor="rank")
Pr95Q3T=MultiBin(rsqAA[rsqQ3,"tstatF"],prOutLbel95[rsqQ3],binSides=qqTstatSS,rcor="rank")
Pr95Q4T=MultiBin(rsqAA[rsqQ4,"tstatF"],prOutLbel95[rsqQ4],binSides=qqTstatSS,rcor="rank")
Pr95Q5T=MultiBin(rsqAA[rsqQ5,"tstatF"],prOutLbel95[rsqQ5],binSides=qqTstatSS,rcor="rank")

prOutLbel95MAT=round(cbind(Pr95Q1T[[1]],Pr95Q2T[[1]],Pr95Q3T[[1]],Pr95Q4T[[1]],Pr95Q5T[[1]]),3)
colnames(prOutLbel95MAT)=c("prOutLbel95 rsqQ1","prOutLbel95 rsqQ2",
					"prOutLbel95 rsqQ3","prOutLbel95 rsqQ4","prOutLbel95 rsqQ5")

overfit.prOutLbel95MAT=prOutLbel95MAT
# write.csv(overfit.prOutLbel95MAT,file="D:\\RWORK\\EffectPaper\\overfit.prOutLbel95MAT.csv")

# > prOutLbel95MAT
#             prOutLbel95 rsqQ1 prOutLbel95 rsqQ2 prOutLbel95 rsqQ3 prOutLbel95 rsqQ4 prOutLbel95 rsqQ5
# 0<x<=1.8                0.628             0.557             0.507             0.482             0.456
# 1.8<x<=1.95             0.723             0.574             0.459             0.394             0.290
# 1.95<x<=2.5             0.729             0.540             0.425             0.345             0.250
# 2.5<x<=3                0.722             0.524             0.414             0.312             0.210
# 3<x<=4                  0.704             0.478             0.369             0.293             0.176
# 4<x<=20                 0.671             0.433             0.316             0.240             0.146

## Looking at the least overfit models (rsqQ5) we find that between 15% and 47%
## of outL.fstats lie below the 95% lower bound computed from the in.fstat
## with the worst coverage probabilities for the smaller values of in.tstat.
## This shows that even with little overfitting, in sample computed coverage
## probabilities can't be expected to apply to true (not refitted) out of
## sample results.
##
## For more overfit models, this is even worse, with often more than 50% of 
## out of sample values being below the 95% lower bound.
## Coverage probabilities 95% lower bounds

 
##%%%%%%%%%%%%%% Look at this for prOutLbel0 %%%%%%%%%%%%%%%%%%%%%
qqTstatSS=c(0.00,  1.80,  1.95,  2.50,  3.00,  4.00, 20.00)
Pr0Q1T=MultiBin(rsqAA[rsqQ1,"tstatF"],prOutLbel0[rsqQ1],binSides=qqTstatSS,rcor="rank")
Pr0Q2T=MultiBin(rsqAA[rsqQ2,"tstatF"],prOutLbel0[rsqQ2],binSides=qqTstatSS,rcor="rank")
Pr0Q3T=MultiBin(rsqAA[rsqQ3,"tstatF"],prOutLbel0[rsqQ3],binSides=qqTstatSS,rcor="rank")
Pr0Q4T=MultiBin(rsqAA[rsqQ4,"tstatF"],prOutLbel0[rsqQ4],binSides=qqTstatSS,rcor="rank")
Pr0Q5T=MultiBin(rsqAA[rsqQ5,"tstatF"],prOutLbel0[rsqQ5],binSides=qqTstatSS,rcor="rank")

prOutLbel0MAT=round(cbind(Pr0Q1T[[1]],Pr0Q2T[[1]],Pr0Q3T[[1]],Pr0Q4T[[1]],Pr0Q5T[[1]]),3)
colnames(prOutLbel0MAT)=c("prOutLbel0 rsqQ1","prOutLbel0 rsqQ2",
					"prOutLbel0 rsqQ3","prOutLbel0 rsqQ4","prOutLbel0 rsqQ5")

overfit.prOutLbel0MAT=prOutLbel0MAT

# write.csv(overfit.prOutLbel0MAT,file="D:\\RWORK\\EffectPaper\\overfit.prOutLbel0MAT.csv")


##%%%%%%%%%%%%%%%%%%%% In/out fstat-tstat %%%%%%%%%%%%%%%%%%%%
##            %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
## In vs. out of sample fstat by rsqQ1-5. !!NOTE!! Here we use medians
## to reduce impact of very large values
qqFstatS=c(0,2.5,4,5,7,11,200)

FfooQ1=MultiBinM(fstat[rsqQ1],outL.fstat[rsqQ1],binSides=qqFstatS)
FfooQ2=MultiBinM(fstat[rsqQ2],outL.fstat[rsqQ2],binSides=qqFstatS)
FfooQ3=MultiBinM(fstat[rsqQ3],outL.fstat[rsqQ3],binSides=qqFstatS)
FfooQ4=MultiBinM(fstat[rsqQ4],outL.fstat[rsqQ4],binSides=qqFstatS)
FfooQ5=MultiBinM(fstat[rsqQ5],outL.fstat[rsqQ5],binSides=qqFstatS)

outL.fstatMA=round(cbind(t(FfooQ3[[3]]),FfooQ1[[1]],FfooQ2[[1]],FfooQ3[[1]],FfooQ4[[1]],FfooQ5[[1]]),3)
colnames(outL.fstatMA)=c("in.fstat","outL.fstat rsqQ1","outL.fstat rsqQ2",
					"outL.fstat rsqQ3","outL.fstat rsqQ4","outL.fstat rsqQ5")

# > outL.fstatMA
#           in.fstat outL.fstat rsqQ1 outL.fstat rsqQ2 outL.fstat rsqQ3 outL.fstat rsqQ4 outL.fstat rsqQ5
# 0<x<=2.5     0.710           -0.130           -0.048           -0.005            0.014            0.034
# 2.5<x<=4     3.180           -1.451           -0.370            0.416            0.888            2.024
# 4<x<=5       4.472           -1.774           -0.251            1.030            1.936            3.217
# 5<x<=7       5.909           -2.133            0.220            1.573            2.875            4.419
# 7<x<=11      8.738           -1.879            1.635            3.142            4.494            7.044
# 11<x<=200   23.332            1.452            7.673           11.898           15.603           20.558

## Here we find that in rsqQ1 almost nothing is positive out of
## sample other than the largest in.fstat values. For rsqQ2, this is on average
## true also for in.fstat's<=5 and on average only reaches significance on 
## average for in.fstat's>10.  Even for rsqQ5, there is still a drop of 1-3
## in outL.fstats, due to a combination of p-hacking (for low values of in.fstat)
## and misestimate reversion to the mean (for high values of in.fstat). 

##%%%%%%%%%%%%%%%% In/out tstat by rsqQ1-Q5 %%%%%%%%%%%%%%%%%%%
## Next we bin outL.fstat's on abs("t value"), then afterwards,
## convert the median fstat's to tstatF using FtoT
## In vs. out tstat by rsqQ1-5.
tstat=abs(rsqAA[,"t value"]) 
TfooQ1=MultiBinM(tstat[rsqQ1],outL.fstat[rsqQ1],binSides=qqTstatS)
TfooQ2=MultiBinM(tstat[rsqQ2],outL.fstat[rsqQ2],binSides=qqTstatS)
TfooQ3=MultiBinM(tstat[rsqQ3],outL.fstat[rsqQ3],binSides=qqTstatS)
TfooQ4=MultiBinM(tstat[rsqQ4],outL.fstat[rsqQ4],binSides=qqTstatS)
TfooQ5=MultiBinM(tstat[rsqQ5],outL.fstat[rsqQ5],binSides=qqTstatS)

outL.tstatMA=cbind(t(TfooQ3[[3]]),TfooQ1[[1]],TfooQ2[[1]],TfooQ3[[1]],TfooQ4[[1]],TfooQ5[[1]])
foo=FtoT(outL.tstatMA[,2:ncol(outL.tstatMA)],1,2500)
outL.tstatMA[,2:ncol(outL.tstatMA)]=foo
outL.tstatMA=round(outL.tstatMA,2)
colnames(outL.tstatMA)=c("in.tstat","outL.tstatF rsqQ1","outL.tstatF rsqQ2",
					"outL.tstatF rsqQ3","outL.tstatF rsqQ4","outL.tstatF rsqQ5")

# > outL.tstatMA
#             in.tstat outL.tstatF rsqQ1 outL.tstatF rsqQ2 outL.tstatF rsqQ3 outL.tstatF rsqQ4 outL.tstatF rsqQ5
# 0<x<=1.5        0.69              0.00              0.00              0.00              0.10              0.17
# 1.5<x<=1.8      1.65              0.00              0.00              0.48              0.86              1.27
# 1.8<x<=1.95     1.87              0.00              0.00              0.70              1.07              1.50
# 1.95<x<=2.5     2.21              0.00              0.00              1.08              1.44              1.90
# 2.5<x<=3        2.74              0.00              0.94              1.55              1.95              2.41
# 3<x<=4          3.43              0.00              1.88              2.34              2.66              3.10
# 4<x<=20         5.35              2.27              3.48              4.20              4.73              5.19

## We note that the FtoT transformation is done on the median fstats for each
## bin and column to mitigate the effect of bad transforms for very small/negative
## and very large outL.fstats where the transform either doesn't make sense 
## (e.g. for negative outL.fstats) or is subject to numerical errors.
## Although these transformations aren't perfect they at least present
## the results in a form (that is as tstat's) that most researchers are
## are more familiar with. 
##
## Here we see that for the least overfit models (rsqQ5) the out of sample tstat's drop around
## 0.3 standard deviation's.  For the smaller in.tstats this is probably due to p-search effects
## while for the larger in.tstats (where p-search effects are very rare on models with little 
## overfitting) they are probably due to misestimation effects (reversion to the mean).
##
## For the more overfit models, out of sample performance drops are dramatic.  For rsqQ1 and rsqQ2
## they are around two standard deviations, reflecting both the drop in overall model performance
## and the increased incidence of p-search effects in overfit models.
## 
## For moderately overfit models (rsqQ3, rsqQ4) the drop is around one standard deviation (slightly
## more for rsqQ3).

##%%%%%%%%%%%%% Save these %%%%%%%%%%%
overfitOutL.fstatMA=outL.fstatMA
overfitOutL.tstatMA = outL.tstatMA

# write.csv(overfitOutL.fstatMA,file="D:\\RWORK\\EffectPaper\\overfitOutL.fstatMA.csv")
# write.csv(overfitOutL.tstatMA,file="D:\\RWORK\\EffectPaper\\overfitOutL.tstatMA.csv")



##%%%%%%%%%%%%%%%%%%%% Detecting Overfitting with cv %%%%%%%%%%%%%%%%%%%%%%%%
##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
cvTfooQ1=MultiBinM(tstat[cv.rsqQ1],outL.fstat[cv.rsqQ1],binSides=qqTstatS)
cvTfooQ2=MultiBinM(tstat[cv.rsqQ2],outL.fstat[cv.rsqQ2],binSides=qqTstatS)
cvTfooQ3=MultiBinM(tstat[cv.rsqQ3],outL.fstat[cv.rsqQ3],binSides=qqTstatS)
cvTfooQ4=MultiBinM(tstat[cv.rsqQ4],outL.fstat[cv.rsqQ4],binSides=qqTstatS)
cvTfooQ5=MultiBinM(tstat[cv.rsqQ5],outL.fstat[cv.rsqQ5],binSides=qqTstatS)

cVoutL.tstatMA=cbind(t(cvTfooQ3[[3]]),cvTfooQ1[[1]],cvTfooQ2[[1]],cvTfooQ3[[1]],cvTfooQ4[[1]],cvTfooQ5[[1]])
foo=FtoT(cVoutL.tstatMA[,2:6],1,2500)
cVoutL.tstatMA[,2:ncol(outL.tstatMA)]=foo
cVoutL.tstatMA=round(cVoutL.tstatMA,2)
colnames(cVoutL.tstatMA)=c("in.tstat","outL.tstatF cvrQ1","outL.tstatF cvrQ2",
					"outL.tstatF cvrQ3","outL.tstatF cvrQ4","outL.tstatF cvrQ5")


# > cVoutL.tstatMA
#             in.tstat outL.tstatF cvrQ1 outL.tstatF cvrQ2 outL.tstatF cvrQ3 outL.tstatF cvrQ4 outL.tstatF cvrQ5
# 0<x<=1.8        0.80              0.00              0.00              0.00              0.20              0.09
# 1.8<x<=1.95     1.87              0.00              0.00              0.74              1.08              1.42
# 1.95<x<=2.5     2.21              0.00              0.00              1.01              1.45              1.92
# 2.5<x<=3        2.73              0.00              0.93              1.55              1.87              2.42
# 3<x<=4          3.43              0.62              1.82              2.43              2.50              3.04
# 4<x<=20         5.14              2.42              3.61              4.14              4.72              5.08

## Here we see that the results are similar for conditioning on cv.rsqQ1-5 as 
## seen conditioning in rsqQ1-5, as such, we can use cv.rsq ratio's to detect
## overfitting.

##%%%%%%%%%%%%%%%%%%%%%% Seve these %%%%%%
overfit.CVoutL.tstatMA=cVoutL.tstatMA

# save(overfit.CVoutL.tstatMA,file="overfitCVoutL.tstatMA")
# write.csv(overfit.CVoutL.tstatMA,file="D:\\RWORK\\EffectPaper\\overfit.CVoutL.tstatMA.csv")



##%%%%%%%%%%%%%%%%%%%%%%%% outL.fstat vs. cv.fstat %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

## Since cross validation tests seem to do a good job of estimating out of sample R^2
## we will now plug various cross validation estimates into f^2 and the associated 
## F-statistic to see whether we can get an equally good estimate for out.fstat
qqFstatS=c(0,2.5,4,5,7,11,200)
cv.fstat=rsqAA[,"cv.fstat"]

## Compare cv.fstat to outL.fstat on rsqQL=rsqQ1&rsqQ2

cvVsOutLFfooQL=MultiBinM(cv.fstat[rsqQL],outL.fstat[rsqQL],binSides=qqFstatS)
cvVsOutLFfooQM=MultiBinM(cv.fstat[rsqQ3|rsqQ4],outL.fstat[rsqQ3|rsqQ4],binSides=qqFstatS)
cvVsOutLFfooQ5=MultiBinM(cv.fstat[rsqQ5],outL.fstat[rsqQ5],binSides=qqFstatS)

cvVsOutLFMA=cbind(t(cvVsOutLFfooQL[[3]]),cvVsOutLFfooQL[[1]],t(cvVsOutLFfooQM[[3]]),cvVsOutLFfooQM[[1]],
					t(cvVsOutLFfooQ5[[3]]),cvVsOutLFfooQ5[[1]])
cvVsOutLFMA=round(cvVsOutLFMA,2)
colnames(cvVsOutLFMA)=c("cv.fstat rsqQ1&2","outL.fstat rsqQ1&2","cv.fstat rsqQ3&4",
					"outL.fstat rsqQ3&4","cv.fstat rsqQ5","outL.fstat rsqQ5")


# > cvVsOutLFMA
#           cv.fstat rsqQ1&2 outL.fstat rsqQ1&2 cv.fstat rsqQ3&4 outL.fstat rsqQ3&4 cv.fstat rsqQ5 outL.fstat rsqQ5
# 0<x<=2.5              1.00              -0.96             1.06               0.58           1.11             1.84
# 2.5<x<=4              3.18              -1.29             3.20               1.73           3.21             3.84
# 4<x<=5                4.47              -0.85             4.48               2.49           4.49             4.98
# 5<x<=7                5.90              -0.50             5.94               3.43           5.93             6.22
# 7<x<=11               8.69               1.20             8.78               5.44           8.82             8.82
# 11<x<=200            18.48               7.40            26.29              16.47          31.86            22.99

## Here we see that for the more overfit models (on rsqQ1&rsqQ2, rsqOutVsIn<0.5), cv.fstat significantly
## overestimates outL.fstat.  This is still true for rsqQ3&rsqQ4, 0.5<=rsqOutVsIn<0.85 though not as badly
## on the most overfit models.  For the least overfit models, this goes the other way, with cv.fstat 
## on average underestimating outL.fstat except for the largest fstat values.

##%%%%%%%%%%%%% save as cvfstat 
overfit.cvVsOutLFMA=cvVsOutLFMA

# write.csv(overfit.cvVsOutLFMA,file="D:\\RWORK\\EffectPaper\\overfit.cvVsOutLFMA.csv")


##%%%%%%%%%%%% Variable level effect size paper computations %%%%%%%%%%%%%%%
##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

##%%%%%%%%%% Compute shrinkage outL/in fstat, r.squared, cv %%%%%
shrinkR=rsqAA[,"outL.r.squaredI"]/rsqAA[,"r.squaredI"]
shrinkFst=rsqAA[,"outL.fstat"]/rsqAA[,"fstat"]
shrinkC=rsqAA[,"cv.r.squaredI"]/rsqAA[,"r.squaredI"]

tt4=abs(rsqAA[,"t value"])>=4
tt3=abs(rsqAA[,"t value"])>=3&abs(rsqAA[,"t value"])<4
tt2=abs(rsqAA[,"t value"])>=2&abs(rsqAA[,"t value"])<3
tt1=abs(rsqAA[,"t value"])>=1&abs(rsqAA[,"t value"])<2

## There are many fewer models than inputs and shrinkR
## and shrinkC are both the same across any individual
## model. Here we find just the the values for individual
## models.


## Define model levels and binsides
mm1=shrinkR!=lagM(shrinkR,-1)
binS=c(-1,0,(1:10)/10)


##%%%%%%%%%%% Test shrinkage estimators %%%%%%%%%%%%%%%
shrinkRS=shrinkR[mm1]
shrinkCS=shrinkC[mm1]

rsqI=rsqAA[mm1,"r.squaredI"]
NN=rsqAA[mm1,"n-obs"]
pp=rsqAA[mm1,"n-inputs"]

rsqHatF=rsqHatAll(rsqI,NN,pp)

shrinkF=rsqHatF*((1/rsqI)%*%matrix(1,1,ncol(rsqHatF)))

## Formula based bias of rsquare shrinkage
## > colnames(shrinkF)
##  [1] "rsqH.S"    "rsqH.W"    "rsqH.E"    "rsqH.OP"   "rsqH.P"    "rsqH.H"    "rsqH.C"   
##  [8] "rsqH.LN"   "rsqH.DS"   "rsqH.BU"   "rsqH.BRW"  "rsqH.BROP" "rsqH.R"
## The out of sample estimators are those from ".LN" to ".R"
biasLN=MultiBin(shrinkF[,"rsqH.LN"],shrinkRS,binSides=binS,xName="shr.LN",yNames="shr.Out")
biasDS=MultiBin(shrinkF[,"rsqH.DS"],shrinkRS,binSides=binS,xName="shr.DS",yNames="shr.Out")
biasBU=MultiBin(shrinkF[,"rsqH.BU"],shrinkRS,binSides=binS[2:length(binS)],xName="shr.BU",yNames="shr.Out")
biasBRW=MultiBin(shrinkF[,"rsqH.BRW"],shrinkRS,binSides=binS[2:length(binS)],xName="shr.BRW",yNames="shr.Out")
biasBROP=MultiBin(shrinkF[,"rsqH.BROP"],shrinkRS,binSides=binS[2:length(binS)],xName="shr.BROP",yNames="shr.Out")
biasR=MultiBin(shrinkF[,"rsqH.R"],shrinkRS,binSides=binS,xName="shr.R",yNames="shr.Out")

noBias=MultiBin(shrinkRS,shrinkRS,binSides=binS,xName="shr.Out",yNames="shr.Out")


## Collect the yMeans into a matix 
biasMY=matrix(NA,nrow(biasCV$yMeans),8)
biasMY[,c(1:4,8)]=cbind(noBias$yMeans,biasCV$yMeans,biasLN$yMeans,biasDS$yMeans,biasR$yMeans)
biasMY[2:nrow(biasMY),5:7]=cbind(biasBU$yMeans,biasBRW$yMeans,biasBROP$yMeans)

rownames(biasMY)=rownames(biasCV$yMeans)
colnames(biasMY)=paste("x=shr.",c("Out","CV","LN","DS","BU","BRW","BROP","R"),sep="")

## Collect the xMeans into a matrix
 biasMX=biasMY-biasMY
biasMX[,c(1:4,8)]=t(rbind(noBias$xMeans,biasCV$xMeans,biasLN$xMeans,biasDS$xMeans,biasR$xMeans))
biasMX[2:nrow(biasMY),5:7]=t(rbind(biasBU$xMeans,biasBRW$xMeans,biasBROP$xMeans))

##%%%%%%%%% Compute stderr for shr.Est-shr.Out %%%%%%%%%%%%%%
DbiasCV=MultiBin(shrinkCS,shrinkCS-shrinkRS,binSides=binS,xName="shr.CV",yNames="shr.CV-shr.Out")

DbiasLN=MultiBin(shrinkF[,"rsqH.LN"],shrinkF[,"rsqH.LN"]-shrinkRS,binSides=binS,xName="shr.LN",yNames="shr.LN-shr.Out")
DbiasDS=MultiBin(shrinkF[,"rsqH.DS"],shrinkF[,"rsqH.DS"]-shrinkRS,binSides=binS,xName="shr.DS",yNames="shr.DS-shr.Out")
DbiasBU=MultiBin(shrinkF[,"rsqH.BU"],shrinkF[,"rsqH.BU"]-shrinkRS,binSides=binS[2:length(binS)],xName="shr.BU",yNames="shr.BU-shr.Out")
DbiasBRW=MultiBin(shrinkF[,"rsqH.BRW"],shrinkF[,"rsqH.BRW"]-shrinkRS,binSides=binS[2:length(binS)],xName="shr.BRW",yNames="shr.BRW-shr.Out")
DbiasBROP=MultiBin(shrinkF[,"rsqH.BROP"],shrinkF[,"rsqH.BROP"]-shrinkRS,binSides=binS[2:length(binS)],xName="shr.BROP",yNames="shr.BROP-shr.Out")
DbiasR=MultiBin(shrinkF[,"rsqH.R"],shrinkF[,"rsqH.R"]-shrinkRS,binSides=binS,xName="shr.R",yNames="shr.R-shr.Out")

## Get the DbiasMY matrix
## Collect the yMeans into a matix 
DbiasMY=matrix(NA,nrow(biasCV$yMeans),8)
DbiasMY[,c(2:4,8)]=cbind(DbiasCV$yMeans,DbiasLN$yMeans,DbiasDS$yMeans,DbiasR$yMeans)
DbiasMY[2:nrow(DbiasMY),5:7]=cbind(DbiasBU$yMeans,DbiasBRW$yMeans,DbiasBROP$yMeans)
DbiasMY[,1]=0
rownames(DbiasMY)=rownames(biasCV$yMeans)
colnames(DbiasMY)=paste("x=shr.",c("Out","CV","LN","DS","BU","BRW","BROP","R"),sep="")

## !!! NOTE !!!
## DbiasMY=(biasMX-biasMY)


## Get stderr matrix
DbiasMstd=matrix(NA,nrow(biasCV$yMeans),8)
DbiasMstd[,c(2:4,8)]=cbind(biasCV$yStdevs,biasLN$yStdevs,biasDS$yStdevs,biasR$yStdevs)
DbiasMstd[2:nrow(biasMY),5:7]=cbind(biasBU$yStdevs,biasBRW$yStdevs,biasBROP$yStdevs)
DbiasMstd[,1]=0

rownames(DbiasMstd)=rownames(biasCV$yMeans)
colnames(DbiasMstd)=paste("x=shr.",c("Out","CV","LN","DS","BU","BRW","BROP","R"),sep="")


##%%%%%%%%%% Save these as csv files for the paper %%%%%%%%%
# write.csv(biasMY,file="biasMY.csv")
# write.csv(biasMX,file="biasMX.csv")

# write.csv(DbiasMY,file="DbiasMY.csv")
# write.csv(DbiasMstd,file="DbiasMstd.csv")

## mean(shr.Out), conditioned on eps<(shr.Est)<=(eps+0.1)
## > round(biasMY,2)
##            x=shr.Out x=shr.CV x=shr.LN x=shr.DS x=shr.BU x=shr.BRW x=shr.BROP x=shr.R
## -0.1<x<=0      -0.05    -0.02    -0.12    -0.06       NA        NA         NA   -0.11
## 0<x<=0.1        0.05     0.06     0.01     0.06    -0.32     -0.33      -0.32   -0.02
## 0.1<x<=0.2      0.15     0.16     0.11     0.13    -0.20     -0.21      -0.19    0.10
## 0.2<x<=0.3      0.25     0.24     0.20     0.23    -0.06     -0.07      -0.06    0.19
## 0.3<x<=0.4      0.35     0.35     0.31     0.34     0.13      0.12       0.14    0.30
## 0.4<x<=0.5      0.45     0.47     0.44     0.46     0.29      0.29       0.30    0.43
## 0.5<x<=0.6      0.55     0.58     0.56     0.56     0.48      0.47       0.47    0.55
## 0.6<x<=0.7      0.65     0.66     0.65     0.66     0.61      0.61       0.61    0.65
## 0.7<x<=0.8      0.75     0.78     0.77     0.77     0.74      0.72       0.73    0.76
## 0.8<x<=0.9      0.85     0.82     0.82     0.81     0.81      0.82       0.82    0.81
## 0.9<x<=1        0.95     0.95     0.95     0.95     0.95      0.95       0.95    0.95
##
## From this we can see that shr.CV, shr.LN, shr.DS and (to a lesser extent)
## shr.R, are fairly unbiased estimators of shr.Out, while  

## stderr of shr.Est-shr.Out, conditioned on eps<(shr.Est)<=(eps+0.1)
## > round(DbiasMstd,3)
##            x=shr.Out x=shr.CV x=shr.LN x=shr.DS x=shr.BU x=shr.BRW x=shr.BROP x=shr.R
## -0.1<x<=0          0    0.016    0.016    0.016       NA        NA         NA   0.015
## 0<x<=0.1           0    0.014    0.015    0.014    0.014     0.014      0.014   0.015
## 0.1<x<=0.2         0    0.011    0.012    0.012    0.010     0.010      0.010   0.012
## 0.2<x<=0.3         0    0.010    0.011    0.010    0.011     0.010      0.011   0.010
## 0.3<x<=0.4         0    0.010    0.010    0.010    0.009     0.009      0.009   0.010
## 0.4<x<=0.5         0    0.008    0.008    0.008    0.008     0.008      0.008   0.008
## 0.5<x<=0.6         0    0.006    0.006    0.006    0.006     0.006      0.006   0.006
## 0.6<x<=0.7         0    0.005    0.005    0.004    0.005     0.005      0.005   0.005
## 0.7<x<=0.8         0    0.004    0.004    0.004    0.004     0.004      0.004   0.004
## 0.8<x<=0.9         0    0.004    0.003    0.003    0.003     0.003      0.003   0.003
## 0.9<x<=1           0    0.002    0.002    0.002    0.002     0.003      0.002   0.002
##
## Here we see that the stderr is pretty similar for all of these
## with it being the largest for those values where the shrinkage
## is the greatest. (Not surprising, since these are the values
## with the most overfitting <=> model variance).


##%%%%%%%%%%%%%%% shr.fstat conditioned on shr.Out %%%%%%%%%%%%%%
##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
shr.fstat=rsqAA[,"outL.fstat"]/rsqAA[,"fstat"]
shr.Out=rsqAA[,"outL.r.squaredI"]/rsqAA[,"r.squaredI"]
shr.CV=rsqAA[,"cv.r.squaredI"]/rsqAA[,"r.squaredI"]

## Get model based shrinkage estimators
rsqHatFL=rsqHatAll(rsqAA[,"r.squaredI"],rsqAA[,"n-obs"],rsqAA[,"n-inputs"])
shrinkFL=rsqHatFL*((1/rsqAA[,"r.squaredI"])%*%matrix(1,1,ncol(rsqHatFL)))

## Get just the good ones
shr.Est=shrinkFL[,c(8,9,13)]

## Condition on ranges of abs(t value)
tt4=abs(rsqAA[,"t value"])>=4
tt3=abs(rsqAA[,"t value"])>=3&abs(rsqAA[,"t value"])<4
tt2=abs(rsqAA[,"t value"])>=2&abs(rsqAA[,"t value"])<3
tt1=abs(rsqAA[,"t value"])>=1&abs(rsqAA[,"t value"])<2



## Other set of conditions
tt3.5=abs(rsqAA[,"t value"])>3.5
tt2.5=abs(rsqAA[,"t value"])>2.5&abs(rsqAA[,"t value"])<=3.5
tt1.5=abs(rsqAA[,"t value"])>1.5&abs(rsqAA[,"t value"])<=2.5
tt0.5=abs(rsqAA[,"t value"])>0.5&abs(rsqAA[,"t value"])<=1.5


## Compute conditional means of shr.fstat for different size levels of
## shr.Out
shrOut2fstat0.5=MultiBin(shr.Out[tt0.5],shr.fstat[tt0.5],binSides=binS)

shrOut2fstat1=MultiBin(shr.Out[tt1],shr.fstat[tt1],binSides=binS)
shrOut2fstat1.5=MultiBin(shr.Out[tt1.5],shr.fstat[tt1.5],binSides=binS)

shrOut2fstat2=MultiBin(shr.Out[tt2],shr.fstat[tt2],binSides=binS)
shrOut2fstat2.5=MultiBin(shr.Out[tt2.5],shr.fstat[tt2.5],binSides=binS)

shrOut2fstat3=MultiBin(shr.Out[tt3],shr.fstat[tt3],binSides=binS)
shrOut2fstat3.5=MultiBin(shr.Out[tt3.5],shr.fstat[tt3.5],binSides=binS)

shrOut2fstat4=MultiBin(shr.Out[tt4],shr.fstat[tt4],binSides=binS)

## Collect this into matrix
shrOut2fstatMY=cbind(noBias$yMeans,shrOut2fstat1$yMeans,shrOut2fstat2$yMeans,shrOut2fstat3$yMeans,shrOut2fstat4$yMeans)
shrOut2fstatMY.5=cbind(noBias$yMeans,shrOut2fstat0.5$yMeans,shrOut2fstat1.5$yMeans,shrOut2fstat2.5$yMeans,shrOut2fstat3.5$yMeans)

ccnm=c("mean shr.Out","1<|t|<=2","2<|t|<=3","3<|t|<=4","4<|t|")
ccnm.5=c("mean shr.Out","0.5<|t|<=1.5","1.5<|t|<=2.5","2.5<|t|<=3.5","3.5<|t|")

## Add column names
colnames(shrOut2fstatMY)=ccnm
colnames(shrOut2fstatMY.5)=ccnm.5 

##%%%%%%%%%%% Compute stddev of differences shr.Out-shr.fstat %%%%%%%%%%%%%%%%%%%%%
DshrOut2fstat0.5=MultiBin(shr.Out[tt0.5],shr.Out[tt0.5]-shr.fstat[tt0.5],binSides=binS)

DshrOut2fstat1=MultiBin(shr.Out[tt1],shr.Out[tt1]-shr.fstat[tt1],binSides=binS)
DshrOut2fstat1.5=MultiBin(shr.Out[tt1.5],shr.Out[tt1.5]-shr.fstat[tt1.5],binSides=binS)

DshrOut2fstat2=MultiBin(shr.Out[tt2],shr.Out[tt2]-shr.fstat[tt2],binSides=binS)
DshrOut2fstat2.5=MultiBin(shr.Out[tt2.5],shr.Out[tt2.5]-shr.fstat[tt2.5],binSides=binS)

DshrOut2fstat3=MultiBin(shr.Out[tt3],shr.Out[tt3]-shr.fstat[tt3],binSides=binS)
DshrOut2fstat3.5=MultiBin(shr.Out[tt3.5],shr.Out[tt3.5]-shr.fstat[tt3.5],binSides=binS)

DshrOut2fstat4=MultiBin(shr.Out[tt4],shr.Out[tt4]-shr.fstat[tt4],binSides=binS)


## Collect this into matrix
DshrOut2fstatMY=cbind(noBias$yMeans,DshrOut2fstat1$yMeans,DshrOut2fstat2$yMeans,DshrOut2fstat3$yMeans,DshrOut2fstat4$yMeans)
DshrOut2fstatMY.5=cbind(noBias$yMeans,DshrOut2fstat0.5$yMeans,DshrOut2fstat1.5$yMeans,DshrOut2fstat2.5$yMeans,DshrOut2fstat3.5$yMeans)
DshrOut2fstatMY[,1]=0
DshrOut2fstatMY.5[,1]=0

## Add column names
colnames(DshrOut2fstatMY)=ccnm
colnames(DshrOut2fstatMY.5)=ccnm.5 


##%%%%%%%%% Collect stdard deviations for differences %%%%%%%%%%%%%%%%%%%%%
DshrOut2fstatMYstd=cbind(noBias$yStdev,DshrOut2fstat1$yStdev,DshrOut2fstat2$yStdev,DshrOut2fstat3$yStdev,DshrOut2fstat4$yStdev)
DshrOut2fstatMYstd.5=cbind(noBias$yStdev,DshrOut2fstat0.5$yStdev,DshrOut2fstat1.5$yStdev,DshrOut2fstat2.5$yStdev,DshrOut2fstat3.5$yStdev)
DshrOut2fstatMYstd[,1]=0
DshrOut2fstatMYstd.5[,1]=0

## Add column names
colnames(DshrOut2fstatMYstd)=ccnm
colnames(DshrOut2fstatMYstd.5)=ccnm.5 


##%%%%%%%%%% Merge Delta yMeans, yStdev matrices %%%%%%%%%%%%%%
DshrOut2fstatMYboth=matrix(0,2*nrow(DshrOut2fstatMY),ncol(DshrOut2fstatMY))
colnames(DshrOut2fstatMYboth)=ccnm

## Create rownames for this matrix
rr=c(rownames(DshrOut2fstatMY),rownames(DshrOut2fstatMY))
rr[2*(1:11)-1]=rownames(DshrOut2fstatMY)
rr[2*(1:11)]="stddev"
rownames(DshrOut2fstatMYboth)=rr
DshrOut2fstatMYboth[2*(1:11)-1,]=DshrOut2fstatMY
DshrOut2fstatMYboth[2*(1:11),]=DshrOut2fstatMYstd


##%%%%%%% Do the same for MY.5 %%%%%%%%%%
DshrOut2fstatMYboth.5=DshrOut2fstatMYboth-DshrOut2fstatMYboth
DshrOut2fstatMYboth.5[2*(1:11)-1,]=DshrOut2fstatMY.5
DshrOut2fstatMYboth.5[2*(1:11),]=DshrOut2fstatMYstd.5


##%%% Drop the first row in the delta matrices, since they are empty
DshrOut2fstatMYboth=DshrOut2fstatMYboth[,2:ncol(DshrOut2fstatMYboth)]
DshrOut2fstatMYboth.5=DshrOut2fstatMYboth.5[,2:ncol(DshrOut2fstatMYboth.5)]

##%%%%%%%%%%%% Save this stuff as .csv files %%%%%%%%%%%%%%%%%%%%%
## shrOut2fstatMY, shrOut2fstatMY.5, 
## DshrOut2fstatMY, DshrOut2fstatMY.5, DshrOut2fstatMYstd, DshrOut2fstatMYstd.5
## DshrOut2fstatMYboth, DshrOut2fstatMYboth.5,

# save(shrOut2fstatMY,file="shrOut2fstatMY") 
# save(shrOut2fstatMY.5,file="shrOut2fstatMY.5") 
# save(DshrOut2fstatMY,file="DshrOut2fstatMY") 
# save(DshrOut2fstatMY.5,file="DshrOut2fstatMY.5") 
# save(DshrOut2fstatMYstd,file="DshrOut2fstatMYstd") 
# save(DshrOut2fstatMYstd.5,file="DshrOut2fstatMYstd.5")
# save(DshrOut2fstatMYboth,file="DshrOut2fstatMYboth") 
# save(DshrOut2fstatMYboth.5,file="DshrOut2fstatMYboth.5")

# write.csv(shrOut2fstatMY,file="shrOut2fstatMY.csv") 
# write.csv(shrOut2fstatMY.5,file="shrOut2fstatMY.5.csv") 
# write.csv(DshrOut2fstatMY,file="DshrOut2fstatMY.csv") 
# write.csv(DshrOut2fstatMY.5,file="DshrOut2fstatMY.5.csv") 
# write.csv(DshrOut2fstatMYstd,file="DshrOut2fstatMYstd.csv") 
# write.csv(DshrOut2fstatMYstd.5,file="DshrOut2fstatMYstd.5.csv")
# write.csv(DshrOut2fstatMYboth,file="DshrOut2fstatMYboth.csv") 
# write.csv(DshrOut2fstatMYboth.5,file="DshrOut2fstatMYboth.5.csv")


##%%%%%%%%%%%%%%%%%%%% shr.CV to shr.fstat %%%%%%%%%%%%%%%%%%%%%%
##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

## Compute conditional means of shr.fstat for different size levels of
## shr.CV
noBias=MultiBin(shr.CV,shr.CV,binSides=binS)

shrCV2fstat1=MultiBin(shr.CV[tt1],shr.fstat[tt1],binSides=binS)
shrCV2fstat2=MultiBin(shr.CV[tt2],shr.fstat[tt2],binSides=binS)
shrCV2fstat3=MultiBin(shr.CV[tt3],shr.fstat[tt3],binSides=binS)
shrCV2fstat4=MultiBin(shr.CV[tt4],shr.fstat[tt4],binSides=binS)

## Collect this into matrix
shrCV2fstatMY=cbind(noBias$yMeans,shrCV2fstat1$yMeans,shrCV2fstat2$yMeans,shrCV2fstat3$yMeans,shrCV2fstat4$yMeans)

ccnm=c("mean shr.CV","1<|t|<=2","2<|t|<=3","3<|t|<=4","4<|t|")

## Add column names
colnames(shrCV2fstatMY)=ccnm

##%%%%%%%%%% Do the same for .5 versions %%%%%%%%%%%%%%%%%%%
shrCV2fstat0.5=MultiBin(shr.CV[tt0.5],shr.fstat[tt0.5],binSides=binS)
shrCV2fstat1.5=MultiBin(shr.CV[tt1.5],shr.fstat[tt1.5],binSides=binS)
shrCV2fstat2.5=MultiBin(shr.CV[tt2.5],shr.fstat[tt2.5],binSides=binS)
shrCV2fstat3.5=MultiBin(shr.CV[tt3.5],shr.fstat[tt3.5],binSides=binS)


## Collect this into matrix
shrCV2fstatMY.5=cbind(noBias$yMeans,shrCV2fstat0.5$yMeans,shrCV2fstat1.5$yMeans,shrCV2fstat2.5$yMeans,shrCV2fstat3.5$yMeans)

ccnm.5=c("mean shr.CV","0.5<|t|<=1.5","1.5<|t|<=2.5","2.5<|t|<=3.5","3.5<|t|")

## Add column names
colnames(shrCV2fstatMY.5)=ccnm.5 



##%%%%%%%%%%%%%%%%%%%% shr.LN to shr.fstat %%%%%%%%%%%%%%%%%%%%%%
##%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
shr.LN=shrinkFL[,"rsqH.LN"]

## Compute conditional means of shr.fstat for different size levels of
## shr.LN with slightly lower lowest level 
shrLN2fstat1=MultiBin(shr.LN[tt1],shr.fstat[tt1],binSides=binS)
shrLN2fstat2=MultiBin(shr.LN[tt2],shr.fstat[tt2],binSides=binS)
shrLN2fstat3=MultiBin(shr.LN[tt3],shr.fstat[tt3],binSides=binS)
shrLN2fstat4=MultiBin(shr.LN[tt4],shr.fstat[tt4],binSides=binS)

## Collect this into matrix
shrLN2fstatMY=cbind(noBias$yMeans,shrLN2fstat1$yMeans,shrLN2fstat2$yMeans,shrLN2fstat3$yMeans,shrLN2fstat4$yMeans)
rownames(shrLN2fstatMY)=rownames(shrLN2fstatMY)

ccnm=c("mean shr.LN","1<|t|<=2","2<|t|<=3","3<|t|<=4","4<|t|")

## Add column names
colnames(shrLN2fstatMY)=ccnm

##%%%%%%%%%%% shr.DS vrs. shr.fstat %%%%%%%%%%%%%%%%%
shr.DS=shrinkFL[,"rsqH.DS"]

## Compute conditional means of shr.fstat for different size levels of
## shr.DS with slightly lower lowest level 
shrDS2fstat1=MultiBin(shr.DS[tt1],shr.fstat[tt1],binSides=binS)
shrDS2fstat2=MultiBin(shr.DS[tt2],shr.fstat[tt2],binSides=binS)
shrDS2fstat3=MultiBin(shr.DS[tt3],shr.fstat[tt3],binSides=binS)
shrDS2fstat4=MultiBin(shr.DS[tt4],shr.fstat[tt4],binSides=binS)

## Collect this into matrix
shrDS2fstatMY=cbind(noBias$yMeans,shrDS2fstat1$yMeans,shrDS2fstat2$yMeans,shrDS2fstat3$yMeans,shrDS2fstat4$yMeans)
rownames(shrDS2fstatMY)=rownames(shrDS2fstatMY)

ccnm=c("mean shr.DS","1<|t|<=2","2<|t|<=3","3<|t|<=4","4<|t|")

## Add column names
colnames(shrDS2fstatMY)=ccnm

##%%%%%%%%%%% shr.R vrs. shr.fstat %%%%%%%%%%%%%%%%%
shr.R=shrinkFL[,"rsqH.R"]

## Compute conditional means of shr.fstat for different size levels of
## shr.R with slightly lower lowest level 
shrR2fstat1=MultiBin(shr.R[tt1],shr.fstat[tt1],binSides=binS)
shrR2fstat2=MultiBin(shr.R[tt2],shr.fstat[tt2],binSides=binS)
shrR2fstat3=MultiBin(shr.R[tt3],shr.fstat[tt3],binSides=binS)
shrR2fstat4=MultiBin(shr.R[tt4],shr.fstat[tt4],binSides=binS)

## Collect this into matrix
shrR2fstatMY=cbind(noBias$yMeans,shrR2fstat1$yMeans,shrR2fstat2$yMeans,shrR2fstat3$yMeans,shrR2fstat4$yMeans)
rownames(shrR2fstatMY)=rownames(shrR2fstatMY)

ccnm=c("mean shr.R","1<|t|<=2","2<|t|<=3","3<|t|<=4","4<|t|")

## Add column names
colnames(shrR2fstatMY)=ccnm



##%%%%%%%%%%% Compute stddev of differences shr.LN-shr.fstat %%%%%%%%%%%%%%%%%%%%%
DshrLN2fstat1=MultiBin(shr.LN[tt1],shr.LN[tt1]-shr.fstat[tt1],binSides=binS)
DshrLN2fstat2=MultiBin(shr.LN[tt2],shr.LN[tt2]-shr.fstat[tt2],binSides=binS)
DshrLN2fstat3=MultiBin(shr.LN[tt3],shr.LN[tt3]-shr.fstat[tt3],binSides=binS)
DshrLN2fstat4=MultiBin(shr.LN[tt4],shr.LN[tt4]-shr.fstat[tt4],binSides=binS)


## Collect this into matrix
DshrLN2fstatMY=cbind(noBias$yMeans,DshrLN2fstat1$yMeans,DshrLN2fstat2$yMeans,DshrLN2fstat3$yMeans,DshrLN2fstat4$yMeans)
DshrLN2fstatMY[,1]=0

## Add column names
colnames(DshrLN2fstatMY)=ccnm


##%%%%%%%%%%% Compute stddev of differences shr.LN-shr.fstat %%%%%%%%%%%%%%%%%%%%%
DshrLN2fstat1=MultiBin(shr.LN[tt1],shr.LN[tt1]-shr.fstat[tt1],binSides=binS)
DshrLN2fstat2=MultiBin(shr.LN[tt2],shr.LN[tt2]-shr.fstat[tt2],binSides=binS)
DshrLN2fstat3=MultiBin(shr.LN[tt3],shr.LN[tt3]-shr.fstat[tt3],binSides=binS)
DshrLN2fstat4=MultiBin(shr.LN[tt4],shr.LN[tt4]-shr.fstat[tt4],binSides=binS)


## Collect this into matrix
DshrLN2fstatMY=cbind(noBias$yMeans,DshrLN2fstat1$yMeans,DshrLN2fstat2$yMeans,DshrLN2fstat3$yMeans,DshrLN2fstat4$yMeans)
DshrLN2fstatMY[,1]=0

## Add column names
colnames(DshrLN2fstatMY)=ccnm

##%%%%%%%%% Collect stdard deviations for differences %%%%%%%%%%%%%%%%%%%%%
DshrLN2fstatMYstd=cbind(noBias$yStdev,DshrLN2fstat1$yStdev,DshrLN2fstat2$yStdev,DshrLN2fstat3$yStdev,DshrLN2fstat4$yStdev)
DshrLN2fstatMYstd[,1]=0

## Add column names
colnames(DshrLN2fstatMYstd)=ccnm

##%%%%%%%%%% Merge Delta yMeans, yStdev matrices %%%%%%%%%%%%%%
DshrLN2fstatMYboth=matrix(0,2*nrow(DshrLN2fstatMY),ncol(DshrLN2fstatMY))
colnames(DshrLN2fstatMYboth)=ccnm

## Create rownames for this matrix
rr=c(rownames(DshrLN2fstatMY),rownames(DshrLN2fstatMY))
rr[2*(1:11)-1]=rownames(DshrLN2fstatMY)
rr[2*(1:11)]="stddev"
rownames(DshrLN2fstatMYboth)=rr
DshrLN2fstatMYboth[2*(1:11)-1,]=DshrLN2fstatMY
DshrLN2fstatMYboth[2*(1:11),]=DshrLN2fstatMYstd

DshrLN2fstatMYboth=DshrLN2fstatMYboth[,2:5]


##%%%%%%%%%%%%%% Save this %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
## !!!! NOTE !!!!
## shr.LN, shr.R are both positively biased (optimistic) as estimators
## of shr.fstat until |t|>4. 
## shr.CV, shr.DS, are positively biased (optimistic) as estimators of 
## shr.fstat until |t|>3.5 
## For larger |t|, all of these are pessimistic for more overfit
## models (shr.Est<0.1) similar to the case for (shr.Out)
## stddev of (shr.Est-shr.fstat) is similar to that for (shr.Out-shr.fstat)
# write.csv(shrCV2fstatMY,file="shrCV2fstatMY.csv")
# write.csv(shrCV2fstatMY.5,file="shrCV2fstatMY.5.csv")
# write.csv(shrDS2fstatMY,file="shrDS2fstatMY.csv")
# write.csv(shrLN2fstatMY,file="shrLN2fstatMY.csv")
# write.csv(shrR2fstatMY,file="shrR2fstatMY.csv")
# write.csv(DshrLN2fstatMYboth,file="DshrLN2fstatMYboth.csv")




